# Importing required dataset
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import plotly.express as px # visualization
%matplotlib inline
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
# Loading UK 2021 census dataset
df_census_2021 = pd.read_csv("Dataset/Ethnic_census_data/2021_data/data_census_2021.csv", index_col=0)
df_census_2021.head(2)
| LSOA | LAD | Output_Areas | Asian | Black | White | Others | |
|---|---|---|---|---|---|---|---|
| 0 | E01000001 | City of London | E00000001 | 5 | 3 | 155 | 13 |
| 1 | E01000001 | City of London | E00000003 | 24 | 5 | 209 | 17 |
#Removing the column geography form LDA dataset before joining LDA and LSOA dataset
df_census_2021.drop(df_census_2021.loc[:,['Output_Areas']], inplace=True, axis=1)
# Selecting only Bradford for further analysis
df_census_2021 = df_census_2021[(df_census_2021.LAD == "Bradford")]
# counting unique values in join_data
n = len(pd.unique(df_census_2021["LSOA"]))
print("No.of.unique values :",n)
No.of.unique values : 312
# Checking the shape of the file
df_census_2021.shape
(1575, 6)
# Loading the health dataset
census2021_ts037_lsoa = pd.read_csv("Dataset/Health/census2021-ts037-lsoa.csv", index_col=0)
census2021_ts038_lsoa = pd.read_csv("Dataset/Health/census2021-ts038-lsoa.csv", index_col=0)
census2021_ts039_lsoa = pd.read_csv("Dataset/Health/census2021-ts039-lsoa.csv", index_col=0)
census2021_ts040_lsoa = pd.read_csv("Dataset/Health/census2021-ts040-lsoa.csv", index_col=0)
#Removing the column geography form LDA dataset before joining LDA and LSOA dataset
census2021_ts037_lsoa.drop(census2021_ts037_lsoa.loc[:,['geography']], inplace=True, axis=1)
census2021_ts038_lsoa.drop(census2021_ts038_lsoa.loc[:,['geography']], inplace=True, axis=1)
census2021_ts039_lsoa.drop(census2021_ts039_lsoa.loc[:,['geography']], inplace=True, axis=1)
census2021_ts040_lsoa.drop(census2021_ts040_lsoa.loc[:,['geography']], inplace=True, axis=1)
census2021_ts037_lsoa.shape
(35672, 7)
census2021_ts037_lsoa.head(1)
| geography code | General health: Total: All usual residents | General health: Very good health | General health: Good health | General health: Fair health | General health: Bad health | General health: Very bad health | |
|---|---|---|---|---|---|---|---|
| date | |||||||
| 2021 | E01000001 | 1475 | 859 | 468 | 119 | 18 | 11 |
census2021_ts038_lsoa.shape
(35672, 8)
census2021_ts038_lsoa.head(1)
| geography code | Disability: Total: All usual residents | Disability: Disabled under the Equality Act | Disability: Disabled under the Equality Act: Day-to-day activities limited a lot | Disability: Disabled under the Equality Act: Day-to-day activities limited a little | Disability: Not disabled under the Equality Act | Disability: Not disabled under the Equality Act: Has long term physical or mental health condition but day-to-day activities are not limited | Disability: Not disabled under the Equality Act: No long term physical or mental health conditions | |
|---|---|---|---|---|---|---|---|---|
| date | ||||||||
| 2021 | E01000001 | 1475 | 152 | 36 | 116 | 1323 | 140 | 1183 |
census2021_ts039_lsoa.shape
(35672, 10)
census2021_ts039_lsoa.head(1)
| geography code | Provision of unpaid care: Total: All usual residents aged 5 and over | Provision of unpaid care: Provides no unpaid care | Provision of unpaid care: Provides 19 hours or less unpaid care a week | Provision of unpaid care: Provides 9 hours or less unpaid care a week | Provision of unpaid care: Provides 10 to 19 hours unpaid care a week | Provision of unpaid care: Provides 20 to 49 hours unpaid care a week | Provision of unpaid care: Provides 20 to 34 hours unpaid care a week | Provision of unpaid care: Provides 35 to 49 hours unpaid care a week | Provision of unpaid care: Provides 50 or more hours unpaid care a week | |
|---|---|---|---|---|---|---|---|---|---|---|
| date | ||||||||||
| 2021 | E01000001 | 1422 | 1322 | 67 | 52 | 15 | 18 | 9 | 9 | 15 |
census2021_ts040_lsoa.shape
(35672, 5)
census2021_ts040_lsoa.head(1)
| geography code | Number of disabled people in household: Total: All households | Number of disabled people in household: No people disabled under the Equality Act in household | Number of disabled people in household: 1 person disabled under the Equality Act in household | Number of disabled people in household: 2 or more people disabled under the Equality Act in household | |
|---|---|---|---|---|---|
| date | |||||
| 2021 | E01000001 | 838 | 701 | 123 | 14 |
#Joining 2021 health data
join_data = pd.merge(left = census2021_ts037_lsoa, right = census2021_ts038_lsoa, how = 'left', on = 'geography code' )
#Joining 2021 health data
join_data = pd.merge(left = join_data, right = census2021_ts039_lsoa, how = 'left', on = 'geography code' )
#Joining 2021 health data
join_data = pd.merge(left = join_data, right = census2021_ts040_lsoa, how = 'left', on = 'geography code' )
# Renaming the column in 2021 census data for further joining of data
df_census_2021 = df_census_2021.rename(columns={'LSOA': 'geography code'})
# Use GroupBy() to compute the sum
df2 = df_census_2021.groupby('geography code').sum()
#Joining health data with census data
health_census = pd.merge(left = df2, right = join_data, how = 'left', on = 'geography code' )
health_census.shape
(312, 31)
#Dataset Information for health_census data
print('The Employment Rate dataframe has {} rows and {} columns'.format(health_census.shape[0], health_census.shape[1]))
The Employment Rate dataframe has 312 rows and 31 columns
# Renaming the column in 2021 census data for further joining of data
health_census = health_census.rename(columns={'geography code' : 'LSOA'})
# Number of LAD in UK census dataset
health_census['LSOA'].unique().size
312
#Description of Datasets
health_census.describe(include='all')
| LSOA | Asian | Black | White | Others | General health: Total: All usual residents | General health: Very good health | General health: Good health | General health: Fair health | General health: Bad health | ... | Provision of unpaid care: Provides 9 hours or less unpaid care a week | Provision of unpaid care: Provides 10 to 19 hours unpaid care a week | Provision of unpaid care: Provides 20 to 49 hours unpaid care a week | Provision of unpaid care: Provides 20 to 34 hours unpaid care a week | Provision of unpaid care: Provides 35 to 49 hours unpaid care a week | Provision of unpaid care: Provides 50 or more hours unpaid care a week | Number of disabled people in household: Total: All households | Number of disabled people in household: No people disabled under the Equality Act in household | Number of disabled people in household: 1 person disabled under the Equality Act in household | Number of disabled people in household: 2 or more people disabled under the Equality Act in household | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 312 | 312.000000 | 312.000000 | 312.000000 | 312.000000 | 312.000000 | 312.000000 | 312.000000 | 312.00000 | 312.000000 | ... | 312.000000 | 312.000000 | 312.000000 | 312.000000 | 312.000000 | 312.000000 | 312.000000 | 312.000000 | 312.000000 | 312.000000 |
| unique | 312 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| top | E01010568 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| freq | 1 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| mean | NaN | 562.983974 | 35.272436 | 1070.464744 | 82.612179 | 1751.272436 | 808.131410 | 604.685897 | 234.38141 | 79.794872 | ... | 44.064103 | 19.666667 | 37.141026 | 16.557692 | 20.583333 | 45.490385 | 672.689103 | 442.785256 | 180.435897 | 49.467949 |
| std | NaN | 615.979284 | 43.981502 | 540.291566 | 52.050497 | 317.107921 | 171.413432 | 113.371586 | 49.62877 | 29.266976 | ... | 18.212811 | 6.763657 | 15.465114 | 7.008713 | 10.418237 | 13.535706 | 136.319735 | 108.626216 | 44.958885 | 14.602744 |
| min | NaN | 3.000000 | 0.000000 | 45.000000 | 7.000000 | 1056.000000 | 409.000000 | 337.000000 | 101.00000 | 19.000000 | ... | 9.000000 | 5.000000 | 3.000000 | 2.000000 | 0.000000 | 6.000000 | 405.000000 | 240.000000 | 87.000000 | 7.000000 |
| 25% | NaN | 56.000000 | 8.000000 | 546.250000 | 42.000000 | 1543.000000 | 691.500000 | 525.500000 | 203.75000 | 58.750000 | ... | 30.000000 | 15.000000 | 25.000000 | 11.000000 | 12.000000 | 37.000000 | 576.750000 | 363.000000 | 147.000000 | 39.000000 |
| 50% | NaN | 273.500000 | 19.000000 | 1202.000000 | 73.000000 | 1677.500000 | 792.000000 | 587.500000 | 234.00000 | 78.000000 | ... | 41.500000 | 19.000000 | 34.500000 | 16.000000 | 19.000000 | 45.000000 | 654.500000 | 428.000000 | 174.500000 | 48.000000 |
| 75% | NaN | 1054.250000 | 43.000000 | 1491.000000 | 113.250000 | 1941.750000 | 910.750000 | 668.500000 | 269.25000 | 103.000000 | ... | 57.000000 | 24.000000 | 46.250000 | 21.000000 | 27.250000 | 54.000000 | 745.000000 | 506.000000 | 202.250000 | 59.000000 |
| max | NaN | 2286.000000 | 408.000000 | 2361.000000 | 290.000000 | 2852.000000 | 1459.000000 | 1035.000000 | 383.00000 | 178.000000 | ... | 107.000000 | 44.000000 | 87.000000 | 49.000000 | 57.000000 | 99.000000 | 1107.000000 | 869.000000 | 367.000000 | 97.000000 |
11 rows × 31 columns
health_census.rename(columns={'General health: Total: All usual residents': 'General_Health_Total',
'General health: Very good health' : 'Health_Very_Good',
'General health: Good health' : 'Health_Good',
'General health: Fair health' : 'Health_Fair',
'General health: Bad health' : 'Health_Bad',
'General health: Very bad health' : 'Health_Very_Bad',
'Disability: Total: All usual residents' : 'Disability_Total',
'Disability: Disabled under the Equality Act' : 'Disabled',
'Disability: Disabled under the Equality Act: Day-to-day activities limited a lot' : 'Disability_limited_lot',
'Disability: Disabled under the Equality Act: Day-to-day activities limited a little' : 'Disability_limited_little',
'Disability: Not disabled under the Equality Act' : 'Disability_No',
'Disability: Not disabled under the Equality Act: Has long term physical or mental health condition but day-to-day activities are not limited' : 'Disability_phy_mental_yes',
'Disability: Not disabled under the Equality Act: No long term physical or mental health conditions' : 'Disability_phy_mental_no',
'Provision of unpaid care: Total: All usual residents aged 5 and over' : 'unpaid_care_total',
'Provision of unpaid care: Provides no unpaid care' : 'unpaid_care_no',
'Provision of unpaid care: Provides 19 hours or less unpaid care a week' : 'unpaid_care_upto19',
'Provision of unpaid care: Provides 9 hours or less unpaid care a week' : 'unpaid_care_upto9',
'Provision of unpaid care: Provides 10 to 19 hours unpaid care a week' : 'unpaid_care_10to19',
'Provision of unpaid care: Provides 20 to 49 hours unpaid care a week' : 'unpaid_care_20to49',
'Provision of unpaid care: Provides 20 to 34 hours unpaid care a week' : 'unpaid_care_20to34',
'Provision of unpaid care: Provides 35 to 49 hours unpaid care a week' : 'unpaid_care_35to49',
'Provision of unpaid care: Provides 50 or more hours unpaid care a week' : 'unpaid_care_above50',
'Number of disabled people in household: Total: All households' : 'household_disabled_total',
'Number of disabled people in household: No people disabled under the Equality Act in household' : 'household_disabled_no',
'Number of disabled people in household: 1 person disabled under the Equality Act in household' : 'household_disabled_1',
'Number of disabled people in household: 2 or more people disabled under the Equality Act in household' : 'household_disabled_above1'
}, inplace=True)
health_census.head(2)
| LSOA | Asian | Black | White | Others | General_Health_Total | Health_Very_Good | Health_Good | Health_Fair | Health_Bad | ... | unpaid_care_upto9 | unpaid_care_10to19 | unpaid_care_20to49 | unpaid_care_20to34 | unpaid_care_35to49 | unpaid_care_above50 | household_disabled_total | household_disabled_no | household_disabled_1 | household_disabled_above1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | E01010568 | 42 | 11 | 1638 | 31 | 1719 | 822 | 605 | 206 | 61 | ... | 74 | 18 | 26 | 13 | 13 | 31 | 825 | 577 | 207 | 41 |
| 1 | E01010569 | 21 | 17 | 1674 | 42 | 1756 | 772 | 583 | 257 | 110 | ... | 50 | 19 | 32 | 13 | 19 | 70 | 841 | 498 | 274 | 69 |
2 rows × 31 columns
#Reordering for columns in the dataset
health_census = health_census.reindex(columns=['LSOA', 'Asian', 'Black', 'White', 'Others',
'General_Health_Total', 'Health_Very_Good', 'Health_Good',
'Health_Fair', 'Health_Bad', 'Health_Very_Bad', 'Disability_Total',
'Disabled', 'Disability_limited_lot', 'Disability_limited_little',
'Disability_No', 'Disability_phy_mental_yes',
'Disability_phy_mental_no', 'unpaid_care_total', 'unpaid_care_no',
'unpaid_care_upto19', 'unpaid_care_upto9', 'unpaid_care_10to19',
'unpaid_care_20to49', 'unpaid_care_20to34', 'unpaid_care_35to49',
'unpaid_care_above50', 'household_disabled_total',
'household_disabled_no', 'household_disabled_1',
'household_disabled_above1'])
#identify duplicate rows
duplicateRows = health_census[health_census.duplicated()]
#view duplicate rows
duplicateRows
| LSOA | Asian | Black | White | Others | General_Health_Total | Health_Very_Good | Health_Good | Health_Fair | Health_Bad | ... | unpaid_care_upto9 | unpaid_care_10to19 | unpaid_care_20to49 | unpaid_care_20to34 | unpaid_care_35to49 | unpaid_care_above50 | household_disabled_total | household_disabled_no | household_disabled_1 | household_disabled_above1 |
|---|
0 rows × 31 columns
print(health_census.duplicated().sum())
0
health_census.isnull().sum().sum()
0
Detecting Outliers The two most efficient business practices for detecting outliers are: Link: https://www.projectpro.io/article/data-cleaning-techniques/651
Removal: The records containing outliers are removed from the distribution. However, the presence of outliers over multiple variables could result in losing out on a large portion of the datasheet with this method. Replacing values: The outliers could alternatively bed treated as missing values and replaced by using appropriate imputation. Capping: Capping the maximum and minimum values and replacing them with an arbitrary value or a value from a variable distribution. Discretization
fig = plt.figure(figsize=(8,4))
<Figure size 800x400 with 0 Axes>
Some outliers represent natural variations in the population, and they should be left as is in your dataset. These are called true outliers.
df_num = health_census.select_dtypes(include = ['float64', 'int64'])
df_num.head(2)
| Asian | Black | White | Others | General_Health_Total | Health_Very_Good | Health_Good | Health_Fair | Health_Bad | Health_Very_Bad | ... | unpaid_care_upto9 | unpaid_care_10to19 | unpaid_care_20to49 | unpaid_care_20to34 | unpaid_care_35to49 | unpaid_care_above50 | household_disabled_total | household_disabled_no | household_disabled_1 | household_disabled_above1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 42 | 11 | 1638 | 31 | 1719 | 822 | 605 | 206 | 61 | 25 | ... | 74 | 18 | 26 | 13 | 13 | 31 | 825 | 577 | 207 | 41 |
| 1 | 21 | 17 | 1674 | 42 | 1756 | 772 | 583 | 257 | 110 | 34 | ... | 50 | 19 | 32 | 13 | 19 | 70 | 841 | 498 | 274 | 69 |
2 rows × 30 columns
params = {'axes.titlesize':'8', 'xtick.labelsize':'12', 'ytick.labelsize':'12'}
plt.rcParams.update(params)
df_num.drop(columns=[]).hist(figsize=(20, 20), bins=50, xlabelsize=8, ylabelsize=8, ); # ; avoid having the matplotlib verbose informations
health_census.dtypes
LSOA object Asian int64 Black int64 White int64 Others int64 General_Health_Total int64 Health_Very_Good int64 Health_Good int64 Health_Fair int64 Health_Bad int64 Health_Very_Bad int64 Disability_Total int64 Disabled int64 Disability_limited_lot int64 Disability_limited_little int64 Disability_No int64 Disability_phy_mental_yes int64 Disability_phy_mental_no int64 unpaid_care_total int64 unpaid_care_no int64 unpaid_care_upto19 int64 unpaid_care_upto9 int64 unpaid_care_10to19 int64 unpaid_care_20to49 int64 unpaid_care_20to34 int64 unpaid_care_35to49 int64 unpaid_care_above50 int64 household_disabled_total int64 household_disabled_no int64 household_disabled_1 int64 household_disabled_above1 int64 dtype: object
Variance tells us about the spread of the data. It tells us how far the points are from the mean.
np.std(health_census)
Asian 614.991346 Black 43.910962 White 539.425019 Others 51.967016 General_Health_Total 316.599327 Health_Very_Good 171.138511 Health_Good 113.189755 Health_Fair 49.549173 Health_Bad 29.220036 Health_Very_Bad 11.213937 Disability_Total 316.598415 Disabled 66.278355 Disability_limited_lot 43.625236 Disability_limited_little 33.679867 Disability_No 286.706793 Disability_phy_mental_yes 38.642430 Disability_phy_mental_no 290.676994 unpaid_care_total 287.605804 unpaid_care_no 269.812600 unpaid_care_upto19 20.783020 unpaid_care_upto9 18.183600 unpaid_care_10to19 6.752809 unpaid_care_20to49 15.440311 unpaid_care_20to34 6.997472 unpaid_care_35to49 10.401527 unpaid_care_above50 13.513997 household_disabled_total 136.101099 household_disabled_no 108.451995 household_disabled_1 44.886778 household_disabled_above1 14.579323 dtype: float64
data = health_census.copy()
# calculating total population
data ['total_population'] = data['Asian'] + data ['Black'] + data ['White'] + data ['Others']
# Calculating % of people in each ethnic group
data['Asian_pct'] = round((data['Asian'] / data['total_population']) * 100,2)
data['Black_pct'] = round((data['Black'] / data['total_population']) * 100,2)
data['White_pct'] = round((data['White'] / data['total_population']) * 100,2)
data['Others_pct'] = round((data['Others'] / data['total_population']) * 100,2)
# Adding general health
data['Health_Very_Good'] = data['Health_Very_Good'] + data['Health_Good']
data['Health_Very_Bad'] = data['Health_Very_Bad'] + data['Health_Bad']
# Apply percentage for the population data with health indicators
# Calculating total health population as[p] percentage
data['Health_Very_Good_pct'] = round((data['Health_Very_Good'] / data['General_Health_Total']) * 100,2)
data['Health_Fair_pct'] = round((data['Health_Fair'] / data['General_Health_Total']) * 100,2)
data['Health_Very_Bad_pct'] = round((data['Health_Very_Bad'] / data['General_Health_Total']) * 100,2)
# Adding disabilty
data['Disabled_pct'] = round((data['Disabled'] / data['Disability_Total']) * 100,2)
data['Disability_limited_lot_pct'] = round((data['Disability_limited_lot'] / data['Disability_Total']) * 100,2)
data['Disability_limited_little_pct'] = round((data['Disability_limited_little'] / data['Disability_Total']) * 100,2)
data['Disability_No_pct'] = round((data['Disability_No'] / data['Disability_Total']) * 100,2)
data['Disability_phy_mental_yes_pct'] = round((data['Disability_phy_mental_yes'] / data['Disability_Total']) * 100,2)
data['Disability_phy_mental_no_pct'] = round((data['Disability_phy_mental_no'] / data['Disability_Total']) * 100,2)
#unpaid care service
data['unpaid_care_up9to49'] = data['unpaid_care_upto19'] + data['unpaid_care_upto9'] + data['unpaid_care_10to19'] + data['unpaid_care_20to49'] + data['unpaid_care_35to49']
# Adding disabilty
data['unpaid_care_up9to49_pct'] = round((data['unpaid_care_up9to49'] / data['unpaid_care_total']) * 100,2)
data['unpaid_care_no_pct'] = round((data['unpaid_care_no'] / data['unpaid_care_total']) * 100,2)
data['unpaid_care_above50_pct'] = round((data['unpaid_care_above50'] / data['unpaid_care_total']) * 100,2)
data.head()
| LSOA | Asian | Black | White | Others | General_Health_Total | Health_Very_Good | Health_Good | Health_Fair | Health_Bad | ... | Disabled_pct | Disability_limited_lot_pct | Disability_limited_little_pct | Disability_No_pct | Disability_phy_mental_yes_pct | Disability_phy_mental_no_pct | unpaid_care_up9to49 | unpaid_care_up9to49_pct | unpaid_care_no_pct | unpaid_care_above50_pct | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | E01010568 | 42 | 11 | 1638 | 31 | 1719 | 1427 | 605 | 206 | 61 | ... | 17.04 | 6.05 | 10.99 | 82.96 | 8.61 | 74.35 | 223 | 13.74 | 90.82 | 1.91 |
| 1 | E01010569 | 21 | 17 | 1674 | 42 | 1756 | 1355 | 583 | 257 | 110 | ... | 24.27 | 11.11 | 13.16 | 75.73 | 6.89 | 68.83 | 189 | 11.37 | 89.72 | 4.21 |
| 2 | E01010570 | 62 | 4 | 1409 | 22 | 1497 | 1259 | 479 | 172 | 58 | ... | 17.42 | 7.08 | 10.35 | 82.58 | 9.01 | 73.56 | 208 | 14.50 | 90.24 | 2.30 |
| 3 | E01010571 | 52 | 27 | 1768 | 96 | 1944 | 1588 | 703 | 251 | 84 | ... | 19.00 | 7.65 | 11.35 | 81.00 | 7.81 | 73.19 | 204 | 11.27 | 91.88 | 2.04 |
| 4 | E01010572 | 27 | 4 | 1422 | 29 | 1483 | 1239 | 551 | 196 | 35 | ... | 15.91 | 4.65 | 11.26 | 84.09 | 8.97 | 75.12 | 187 | 13.19 | 91.33 | 1.83 |
5 rows × 49 columns
# dropping irrelevant columns only required columns
data.drop(['Asian', 'Black', 'White', 'Others', 'total_population',
'General_Health_Total', 'Health_Very_Good', 'Health_Good',
'Health_Fair', 'Health_Bad', 'Health_Very_Bad', 'Disability_Total',
'Disabled', 'Disability_limited_lot', 'Disability_limited_little',
'Disability_No', 'Disability_phy_mental_yes',
'Disability_phy_mental_no', 'unpaid_care_total', 'unpaid_care_no',
'unpaid_care_upto19', 'unpaid_care_upto9', 'unpaid_care_10to19',
'unpaid_care_20to49', 'unpaid_care_20to34', 'unpaid_care_35to49',
'unpaid_care_above50', 'household_disabled_total',
'household_disabled_no', 'household_disabled_1',
'household_disabled_above1','unpaid_care_up9to49',], axis=1, inplace=True)
data.to_csv('Dataset/Processed_Data/processed_clustering_data.csv')
data.describe()
| Asian_pct | Black_pct | White_pct | Others_pct | Health_Very_Good_pct | Health_Fair_pct | Health_Very_Bad_pct | Disabled_pct | Disability_limited_lot_pct | Disability_limited_little_pct | Disability_No_pct | Disability_phy_mental_yes_pct | Disability_phy_mental_no_pct | unpaid_care_up9to49_pct | unpaid_care_no_pct | unpaid_care_above50_pct | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 312.000000 | 312.000000 | 312.000000 | 312.000000 | 312.000000 | 312.000000 | 312.000000 | 312.000000 | 312.000000 | 312.000000 | 312.000000 | 312.000000 | 312.000000 | 312.000000 | 312.000000 | 312.000000 |
| mean | 29.667853 | 1.905609 | 63.823397 | 4.602724 | 80.613269 | 13.461346 | 5.926154 | 17.326090 | 8.008622 | 9.317788 | 82.673910 | 5.640833 | 77.032692 | 11.423750 | 90.986410 | 2.801250 |
| std | 29.909094 | 2.151954 | 32.173490 | 2.637621 | 3.760570 | 2.189483 | 1.942547 | 3.448461 | 2.179853 | 2.011761 | 3.448461 | 2.374730 | 4.356441 | 2.150007 | 1.312167 | 0.776901 |
| min | 0.200000 | 0.000000 | 2.200000 | 0.490000 | 67.560000 | 6.230000 | 1.700000 | 9.160000 | 2.600000 | 4.490000 | 71.030000 | 1.370000 | 66.690000 | 3.090000 | 87.990000 | 0.350000 |
| 25% | 3.510000 | 0.487500 | 29.125000 | 2.437500 | 78.517500 | 12.175000 | 4.590000 | 14.800000 | 6.667500 | 7.837500 | 80.857500 | 3.550000 | 73.995000 | 10.040000 | 90.080000 | 2.320000 |
| 50% | 15.905000 | 1.140000 | 77.865000 | 4.185000 | 80.675000 | 13.380000 | 5.705000 | 17.060000 | 7.805000 | 9.290000 | 82.940000 | 5.725000 | 76.560000 | 11.430000 | 91.000000 | 2.730000 |
| 75% | 59.372500 | 2.520000 | 92.470000 | 5.972500 | 82.712500 | 14.770000 | 7.232500 | 19.142500 | 9.270000 | 10.685000 | 85.200000 | 7.662500 | 80.192500 | 13.002500 | 91.750000 | 3.210000 |
| max | 94.010000 | 14.300000 | 98.740000 | 15.930000 | 92.070000 | 20.330000 | 12.510000 | 28.970000 | 14.860000 | 14.110000 | 90.840000 | 10.760000 | 86.560000 | 16.860000 | 97.840000 | 5.730000 |
#Using a hexbin plot with marginal distributions to distintly assess the spread of good health compared to
#bad health
#Plotting the hexbin plot for Good Health
sns.set(style="ticks")
x = data['Asian_pct']
y = data['Health_Very_Good_pct']
sns.jointplot(x, y, kind="hex", color="#4CB391")
#Plotting the hexbin box plot for Bad health
sns.set(style="ticks")
x = data['Asian_pct']
y = data['Health_Very_Bad_pct']
sns.jointplot(x, y, kind="hex", color="#4CB391")
<seaborn.axisgrid.JointGrid at 0x7fa9fc23fc40>
#Using a hexbin plot with marginal distributions to distintly assess the spread of good health compared to
#bad health
#Plotting the hexbin plot for Good Health
sns.set(style="ticks")
x = data['White_pct']
y = data['Health_Very_Good_pct']
sns.jointplot(x, y, kind="hex", color="#4CB391")
#Plotting the hexbin box plot for Bad health
sns.set(style="ticks")
x = data['White_pct']
y = data['Health_Very_Bad_pct']
sns.jointplot(x, y, kind="hex", color="#4CB391")
<seaborn.axisgrid.JointGrid at 0x7fa9ff4cc0d0>
#Using a hexbin plot with marginal distributions to distintly assess the spread of good health compared to
#bad health
#Plotting the hexbin plot for Good Health
sns.set(style="ticks")
x = data['Black_pct']
y = data['Health_Very_Good_pct']
sns.jointplot(x, y, kind="hex", color="#4CB391")
#Plotting the hexbin box plot for Bad health
sns.set(style="ticks")
x = data['Black_pct']
y = data['Health_Very_Bad_pct']
sns.jointplot(x, y, kind="hex", color="#4CB391")
<seaborn.axisgrid.JointGrid at 0x7faa02ec7130>
# Relationship Between Asian and Disabled_pct in Bradford (2021)
figure = px.scatter(data_frame = data,
x="Asian_pct",
y="Disabled_pct",
size="Disabled_pct",
trendline="ols",
title = "Relationship Between Asian and Disabled_pct in Bradford (2021)")
figure.show()
# Relationship Between Asian and Disability_No_pct in Bradford (2021)
figure = px.scatter(data_frame = data,
x="Asian_pct",
y="Disability_No_pct",
size="Disability_No_pct",
trendline="ols",
title = "Relationship Between Asian and Disability_No_pct in Bradford (2021)")
figure.show()
figure = px.scatter(data_frame = data,
x="White_pct",
y="Disabled_pct",
size="Disabled_pct",
trendline="ols",
title = "Relationship Between White and Disabled_pct in Bradford (2021)")
figure.show()
# Relationship Between White and Disability_No_pct in Bradford (2021)
figure = px.scatter(data_frame = data,
x="White_pct",
y="Disability_No_pct",
size="Disability_No_pct",
trendline="ols",
title = "Relationship Between White and Disability_No_pct in Bradford (2021)")
figure.show()
# Relationship Between Black and Disabled_pct in Bradford (2021)
figure = px.scatter(data_frame = data,
x="Black_pct",
y="Disabled_pct",
size="Disabled_pct",
trendline="ols",
title = "Relationship Between Black and Disabled_pct in Bradford (2021)")
figure.show()
# Relationship Between Black and Disability_No_pct in Bradford (2021)
figure = px.scatter(data_frame = data,
x="Black_pct",
y="Disability_No_pct",
size="Disability_No_pct",
trendline="ols",
title = "Relationship Between Black and Disability_No_pct in Bradford (2021)")
figure.show()
# Relationship Between Asian_pct and unpaid_care_no_pct in Bradford (2021)
sns.scatterplot(x='Asian_pct', y='unpaid_care_no_pct', data= data, marker = '+')
<AxesSubplot: xlabel='Asian_pct', ylabel='unpaid_care_no_pct'>
# Relationship Between Asian_pct and unpaid_care_no_pct in Bradford (2021)
sns.scatterplot(x='Asian_pct', y='unpaid_care_above50_pct', data= data, marker = '+')
<AxesSubplot: xlabel='Asian_pct', ylabel='unpaid_care_above50_pct'>
# Relationship Between Asian_pct and unpaid_care_no_pct in Bradford (2021)
sns.scatterplot(x='Black_pct', y='unpaid_care_no_pct', data= data, alpha = 0.1)
<AxesSubplot: xlabel='Black_pct', ylabel='unpaid_care_no_pct'>
# Relationship Between Asian_pct and unpaid_care_no_pct in Bradford (2021)
sns.scatterplot(x='Black_pct', y='unpaid_care_above50_pct', data= data, alpha = 0.1)
<AxesSubplot: xlabel='Black_pct', ylabel='unpaid_care_above50_pct'>
# Relationship Between Asian_pct and unpaid_care_no_pct in Bradford (2021)
sns.scatterplot(x='White_pct', y='unpaid_care_no_pct', data= data, alpha = 0.1)
<AxesSubplot: xlabel='White_pct', ylabel='unpaid_care_no_pct'>
# Relationship Between Asian_pct and unpaid_care_no_pct in Bradford (2021)
sns.scatterplot(x='White_pct', y='unpaid_care_above50_pct', data= data, alpha = 0.1)
<AxesSubplot: xlabel='White_pct', ylabel='unpaid_care_above50_pct'>
fig = px.scatter(data, x='White_pct', y='unpaid_care_no_pct', hover_data=['unpaid_care_up9to49_pct', 'unpaid_care_above50_pct'])
fig.show()
fig = px.scatter(data, x='White_pct', y='unpaid_care_above50_pct', hover_data=['unpaid_care_up9to49_pct', 'unpaid_care_above50_pct'])
fig.show()